# Load packages -----------------------------------------------------------

library("magrittr")
library("ggplot2")
library("mice")
library("ineq")
library("foreign")
library("knitr")
library("multiwayvcov")
library("stargazer")
library("lmtest")
library("glmnet")
library("xtable")
library("MASS")
library("rms")
library("coefplot")
library("dplyr")
library("haven")
library("unikn")
library("ggsignif")
library("broom")
library("tidyverse")
library("estimatr")



# Load data ---------------------------------------------------------------


# Uganda ------------------------------------------------------------------

# Uganda 1
ug1 <- read.csv(
  "01_data/uganda1.csv",
  na.strings = c(999, 888, 777),
  stringsAsFactors = FALSE
)

# Uganda 2
ug2 <- lapply(
  X = c(
    "uganda2_1.csv",
    "uganda2_2.csv",
    "uganda2_3_1.csv",
    "uganda2_3_2.csv",
    "uganda2_3.csv",
    "uganda2_4.csv",
    "uganda2_5.csv"
  ),
  FUN = function(file_name) {
    read.csv(
      file = paste0("01_data/",
                    file_name),
      stringsAsFactors = FALSE,
      na.strings = c("999", "888", "", " ", "NA")
    )
  }
  
)

# These are variables that are not present in the datasets
missing_vars <- lapply(ug2,
                       function(x) {
                         unique(unlist(lapply(ug2, function(y)
                           names(y)[!names(y) %in% names(x)])))
                       })

# Put any missing variables in
for (i in 1:length(ug2)) {
  ug2[[i]][, missing_vars[[i]]] <- NA
}

#  Order columns according to the order of the first file
var_names <- names(ug2[[1]])
for (i in 1:length(ug2)) {
  ug2[[i]] <- ug2[[i]][, var_names]
}

# Sanity check:
order_check <- all(unlist(lapply(ug2,
                                 function(x) {
                                   all(unlist(lapply(ug2, function(y)
                                     names(y) == names(x))))
                                 })))
# If this breaks, something went wrong with the above
stopifnot(order_check)

# rbind the data into a single data.frame
ug2 <- do.call(what = rbind.data.frame, args = ug2)

# The variable types look as follows:
table(sapply(ug2, class))

rm(var_names, order_check, missing_vars)

# Uganda 3

ug3 <- lapply(
  X = c(
    "uganda3_1.csv",
    "uganda3_2.csv",
    "uganda3_3.csv",
    "uganda3_4.csv"
  ),
  FUN = function(file_name) {
    read.csv(
      file = paste0("01_data/",
                    file_name),
      stringsAsFactors = FALSE,
      na.strings = c("999", "888", "", " ", "NA")
    )
  }
)


# Fixing mistakes in survey that were changed during data collection

ug3_1_original <- ug3[[1]]

ug3[[1]] <- within(data = ug3[[1]],
                  expr = {
                    official_q5_1c <- ug3_1_original$teacher_q5_1c
                    judge_q5_1c <- ug3_1_original$police_q5_1c
                    doctor_q5_1c <- ug3_1_original$clergy_q5_1c
                    teacher_q5_1c <- ug3_1_original$principal_q5_1c
                    police_q5_1c <- ug3_1_original$official_q5_1c
                    clergy_q5_1c <- ug3_1_original$judge_q5_1c
                    principal_q5_1c <- ug3_1_original$doctor_q5_1c
                    
                    official_q5_1d <- ug3_1_original$clergy_q5_1d
                    judge_q5_1d <- ug3_1_original$principal_q5_1d
                    police_q5_1d <- ug3_1_original$official_q5_1d
                    clergy_q5_1d <- ug3_1_original$judge_q5_1d
                    principal_q5_1d <- ug3_1_original$police_q5_1d
                    
                    official_q5_1e <- ug3_1_original$judge_q5_1e
                    judge_q5_1e <- ug3_1_original$doctor_q5_1e
                    doctor_q5_1e <- ug3_1_original$teacher_q5_1e
                    teacher_q5_1e <- ug3_1_original$police_q5_1e
                    police_q5_1e <- ug3_1_original$clergy_q5_1e
                    clergy_q5_1e <- ug3_1_original$principal_q5_1e
                    principal_q5_1e <- ug3_1_original$official_q5_1e
                    
                    official_q5_1f <- ug3_1_original$judge_q5_1f
                    judge_q5_1f <- ug3_1_original$doctor_q5_1f
                    doctor_q5_1f <- ug3_1_original$clergy_q5_1f
                    teacher_q5_1f <- ug3_1_original$principal_q5_1f
                    police_q5_1f <- ug3_1_original$teacher_q5_1f
                    clergy_q5_1f <- ug3_1_original$police_q5_1f
                    principal_q5_1f <- ug3_1_original$official_q5_1f
                  })

ug3_2_original <- ug3[[2]]

ug3[[2]] <- within(data = ug3[[2]],
                  expr = {
                    official_q5_1c <- ug3_2_original$teacher_q5_1c
                    judge_q5_1c <- ug3_2_original$police_q5_1c
                    doctor_q5_1c <- ug3_2_original$clergy_q5_1c
                    teacher_q5_1c <- ug3_2_original$principal_q5_1c
                    police_q5_1c <- ug3_2_original$official_q5_1c
                    clergy_q5_1c <- ug3_2_original$judge_q5_1c
                    principal_q5_1c <- ug3_2_original$doctor_q5_1c
                    
                    official_q5_1d <- ug3_2_original$clergy_q5_1d
                    judge_q5_1d <- ug3_2_original$principal_q5_1d
                    police_q5_1d <- ug3_2_original$official_q5_1d
                    clergy_q5_1d <- ug3_2_original$judge_q5_1d
                    principal_q5_1d <- ug3_2_original$police_q5_1d
                    
                    official_q5_1e <- ug3_2_original$judge_q5_1e
                    judge_q5_1e <- ug3_2_original$doctor_q5_1e
                    doctor_q5_1e <- ug3_2_original$teacher_q5_1e
                    teacher_q5_1e <- ug3_2_original$police_q5_1e
                    police_q5_1e <- ug3_2_original$clergy_q5_1e
                    clergy_q5_1e <- ug3_2_original$principal_q5_1e
                    principal_q5_1e <- ug3_2_original$official_q5_1e
                    
                    official_q5_1f <- ug3_2_original$judge_q5_1f
                    judge_q5_1f <- ug3_2_original$doctor_q5_1f
                    doctor_q5_1f <- ug3_2_original$clergy_q5_1f
                    teacher_q5_1f <- ug3_2_original$principal_q5_1f
                    police_q5_1f <- ug3_2_original$teacher_q5_1f
                    clergy_q5_1f <- ug3_2_original$police_q5_1f
                    principal_q5_1f <- ug3_2_original$official_q5_1f
                  })

# These are variables that are not present in the datasets
missing_vars <- lapply(ug3,
                       function(x) {
                         unique(unlist(lapply(ug3, function(y)
                           names(y)[!names(y) %in% names(x)])))
                       })

# Put any missing variables in
for (i in 1:length(ug3)) {
  ug3[[i]][, missing_vars[[i]]] <- NA
}

#  Order columns according to the order of the first file
var_names <- names(ug3[[1]])
for (i in 1:length(ug3)) {
  ug3[[i]] <- ug3[[i]][, var_names]
}

# Sanity check:
order_check <- all(unlist(lapply(ug3,
                                 function(x) {
                                   all(unlist(lapply(ug3, function(y)
                                     names(y) == names(x))))
                                 })))
# If this breaks, something went wrong with the above
stopifnot(order_check)

# rbind the data into a single data.frame
ug3 <- do.call(what = rbind.data.frame, args = ug3)

# The variable types look as follows:
table(sapply(ug3, class))


rm(ug3_1_original,
   ug3_2_original,
   var_names,
   order_check,
   missing_vars)

# Tanzania ----------------------------------------------------------------

# Tanzania 1
tan <- read.csv(file = "01_data/tanzania1.csv")

# Tanzania 2
tan2 <- read.csv(file = "01_data/tanzania2.csv")

# Tanzania
tan3 <- read_dta(file = "01_data/tanzania3.dta") %>% as.data.frame()

# South Africa ------------------------------------------------------------

sa <- read.csv(file = "01_data/south_africa.csv")

# Afrobarometer -----------------------------------------------------------

afro <- read.csv(file = "01_data/afrobarometer.csv")

# Prison data ------------------------------------------------------------

icpr <- read.csv(file = "01_data/icpr.csv",stringsAsFactors = FALSE,header = F)
names(icpr) <- c("Country","women_prison_pop")






